***Panel for earnings including zeros***
version 17.0
clear all
set more off
capture log close
cd "P:\2018\186"

*1) Creating a dataset in wide format
use idnr woman yob if yob!=. & woman!=. & yob<=1998 using "use\birthrecords", replace
*Earnings from IoT
local years "1968 1971 1973 1976 1979 1982"
foreach x of local years{
	sort idnr
	merge 1:1 idnr using "use\iot`x'", keepusing(earn`x')  keep(1 3) nogen
}
*Earnings from FoB FoB 1970, 1975, and 1980
forval i=1970(5)1980{
	sort idnr
	merge 1:1 idnr using "use\fob`i'", keepusing(earn`i') keep(1 3) nogen
}
*Earnings from LOUISE 1985-2019
forval i=1985(1)2019{
	sort idnr
	merge 1:1 idnr using "use\louise`i'", keepusing(yob earn`i') keep(1 3) nogen
}
*Dropping individuals with no observed earnings
egen t=rowmean(earn1968-earn2019)
drop if t==.
drop t
*Real earnings in 2016 prices
replace earn1968=round(earn1968*(4623/551),1)
replace earn1970=round(earn1970*(4623/605),1)
replace earn1971=round(earn1971*(4623/650),1)
replace earn1973=round(earn1973*(4623/735),1)
replace earn1975=round(earn1975*(4623/887),1)
replace earn1976=round(earn1976*(4623/979),1)
replace earn1979=round(earn1979*(4623/1286),1)
replace earn1980=round(earn1980*(4623/1461),1)
replace earn1982=round(earn1982*(4623/1778),1)
replace earn1985=round(earn1985*(4623/2246),1)
replace earn1986=round(earn1986*(4623/2341),1)
replace earn1987=round(earn1987*(4623/2440),1)
replace earn1988=round(earn1988*(4623/2582),1)
replace earn1989=round(earn1989*(4623/2748),1)
replace earn1990=round(earn1990*(4623/3036),1)
replace earn1991=round(earn1991*(4623/3319),1)
replace earn1992=round(earn1992*(4623/3395),1)
replace earn1993=round(earn1993*(4623/3553),1)
replace earn1994=round(earn1994*(4623/3631),1)
replace earn1995=round(earn1995*(4623/3723),1)
replace earn1996=round(earn1996*(4623/3740),1)
replace earn1997=round(earn1997*(4623/3760),1)
replace earn1998=round(earn1998*(4623/3754),1)
replace earn1999=round(earn1999*(4623/3772),1)
replace earn2000=round(earn2000*(4623/3809),1)
replace earn2001=round(earn2001*(4623/3902),1)
replace earn2002=round(earn2002*(4623/3986),1)
replace earn2003=round(earn2003*(4623/4063),1)
replace earn2004=round(earn2004*(4623/4078),1)
replace earn2005=round(earn2005*(4623/4097),1)
replace earn2006=round(earn2006*(4623/4153),1)
replace earn2007=round(earn2007*(4623/4243),1)
replace earn2008=round(earn2008*(4623/4390),1)
replace earn2009=round(earn2009*(4623/4378),1)
replace earn2010=round(earn2010*(4623/4434),1)
replace earn2011=round(earn2011*(4623/4550),1)
replace earn2012=round(earn2012*(4623/4590),1)
replace earn2013=round(earn2013*(4623/4588),1)
replace earn2014=round(earn2014*(4623/4580),1)
replace earn2015=round(earn2015*(4623/4578),1)
replace earn2016=round(earn2016*(4623/4623),1)
replace earn2017=round(earn2017*(4623/4706),1)
replace earn2018=round(earn2018*(4623/4798),1)
replace earn2019=round(earn2019*(4623/4884),1)
compress
sort idnr
order idnr earn1968-earn2019
save "use\incwide2", replace

*2) Creating a panel
use "use\incwide2", replace
reshape long earn, i(idnr) j(year)
g age=year-yob
*Keeping observations between 18 and 65 years of age
keep if age>=18 & age<=65
lab var age  "Age"
lab var earn "Income"
lab var year "Year"
lab var yob "Year of birth"
compress
sort idnr year
save "use\incpanel2", replace
*Creating an employment variable based on 20 percent of annual male median earnings>0
keep if earn>0 & earn!=. & woman==0
bysort year: egen p50=pctile(earn), p(50)
keep year p50
duplicates drop year, force
replace p50=p50*0.2
lab var p50 "20 percent of annual median income"
merge 1:m year using "use\incpanel2", nogen
g emp=earn>=p50
lab var emp "Employed income defined"
compress
sort idnr year
save "use\incpanel2", replace

*3) Childrens's earnings at different age intervals
use idnr yob year earn age woman if earn!=. & (age>=29 & age<=41) using "use\incpanel2", replace
g earn1=earn if (age>=29 & age<=31)
g earn2=earn if (age>=34 & age<=36)
g earn3=earn if (age>=39 & age<=41)
g earn4=earn if (age>=30 & age<=36)
*Dropping individuals with less than 2 positive earnings
bysort idnr: egen nearn=count(earn)
drop if nearn<2
forval i=1/4{
	bysort idnr: egen nearn`i'=count(earn`i')
}
*Averaging earnings over time
collapse (mean) earn earn1 earn2 earn3 earn4 yob woman nearn1 nearn2 nearn3 nearn4, by(idnr)
lab var earn "earnings age 29-41"
lab var earn1 "earnings age 29-31"
lab var earn2 "earnings age 34-36"
lab var earn3 "earnings age 39-41"
lab var earn4 "earnings age 30-36"
lab var nearn1 "number of observed incomes age 29-31"
lab var nearn2 "number of observed incomes age 34-36"
lab var nearn3 "number of observed incomes age 39-41"
lab var nearn4 "number of observed incomes age 30-36"
lab var yob "year of birth"
compress
sort idnr
save "use\earn_children2", replace

*4) Parental earnings at different age intervals
use idnr yob year earn age woman if earn!=. & (age>=42 & age<=58) using "use\incpanel2", replace
g earn1=earn if (age>=42 & age<=48)
g earn2=earn if (age>=46 & age<=52)
g earn3=earn if (age>=52 & age<=58)
*Dropping all individuals with less than 2 positive earnings
bysort idnr: egen nearn=count(earn)
drop if nearn<2
forval i=1/3{
	bysort idnr: egen nearn`i'=count(earn`i')
}
*Averaging earnings over time
collapse (mean) earn earn1 earn2 earn3 yob woman nearn1 nearn2 nearn3, by(idnr)
lab var earn "earnings age 42-58"
lab var earn1 "earnings age 42-48"
lab var earn2 "earnings age 46-52"
lab var earn3 "earnings age 52-58"
lab var nearn1 "number of observed incomes age 42-48"
lab var nearn2 "number of observed incomes age 46-52"
lab var nearn3 "number of observed incomes age 52-58"
lab var yob "year of birth"
compress
sort idnr
save "use\earn_parents2", replace

********************************************************************************
*1) Parental earnings per child cohort: 5 years of (potential) earnings starting when the child is 12, adjusting for observation gaps in these years: 1969, 1972, 1974, 1977-78, 1981, and 1983-84
*1952-1956 cohort
forval i=52/56{
	use if year>=1968 & year<=1975 & earn!=. using "use\incpanel2", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn emp yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_2", replace
}
*1957-1958 cohort
forval i=57/58{
	use if year>=1969 & year<=1976 & earn!=. using "use\incpanel2", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn emp yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_2", replace
}
*1959 cohort
use if year>=1971 & year<=1979 & earn!=. using "use\incpanel2", replace
bysort idnr: egen nearn=count(earn)
collapse (mean) earn emp yob woman nearn year, by(idnr)
rename year obsy
sort idnr
compress
save "use\earn59_2", replace
*1960-1961 cohort
forval i=60/61{
	use if year>=1972 & year<=1980 & earn!=. using "use\incpanel2", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn emp yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_2", replace
}
*1962-1963 cohort
forval i=62/63{
	use if year>=1974 & year<=1982 & earn!=. using "use\incpanel2", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn emp yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_2", replace
}
*1964 cohort
use if year>=1976 & year<=1985 & earn!=. using "use\incpanel2", replace
bysort idnr: egen nearn=count(earn)
collapse (mean) earn emp yob woman nearn year, by(idnr)
rename year obsy
sort idnr
compress
save "use\earn64_2", replace
*1965-1967 cohort
forval i=65/67{
	use if year>=1977 & year<=1986 & earn!=. using "use\incpanel2", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn emp yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_2", replace
}
*1968 cohort
use if year>=1980 & year<=1987 & earn!=. using "use\incpanel2", replace
bysort idnr: egen nearn=count(earn)
collapse (mean) earn emp yob woman nearn year, by(idnr)
rename year obsy
sort idnr
compress
save "use\earn68_2", replace
*1969-1970 cohort
forval i=69/70{
	use if year>=1981 & year<=1988 & earn!=. using "use\incpanel2", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn emp yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_2", replace
}
*1971-1972 cohort
forval i=71/72{
	use if year>=1983 & year<=1989 & earn!=. using "use\incpanel2", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn emp yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_2", replace
}
*1973-1994 cohort
forval i=73/94{
	use if year>=19`i'+12 & year<=19`i'+16 & earn!=. using "use\incpanel2", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn emp yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_2", replace
}

*2) Earnings panel
use if yob>=1952 & yob<=1994 & age>=25 & age<=48 & earn!=. using "use\incpanel2", replace
*2a) Adding parents
sort idnr
merge m:1 idnr using "use\multigen", keepusing(midnr fidnr myob fyob bcountry mbcountry fbcountry) keep(1 3) nogen
*2b) Adding parental earnings and employment 
*mothers
rename (idnr midnr earn emp)(temp1 idnr temp2 temp3)
sort idnr
merge m:1 idnr using "use\earn52_2", keepusing(earn nearn obsy emp) nogen keep(1 3)
replace earn=. if yob!=1952
replace nearn=. if yob!=1952
replace obsy=. if yob!=1952
replace emp=. if yob!=1952
rename (earn nearn obsy emp)(mearn mnearn mobsy memp)
forval i=53/94{
	merge m:1 idnr using "use\earn`i'_2", keepusing(earn nearn obsy emp) nogen keep(1 3)
	replace mearn=earn if yob==19`i'
	replace mnearn=nearn if yob==19`i'
	replace mobsy=obsy if yob==19`i'
	replace memp=emp if yob==19`i'
	drop earn nearn obsy emp
}
*fathers
rename (idnr fidnr)(midnr idnr)
sort idnr
merge m:1 idnr using "use\earn52_2", keepusing(earn nearn obsy emp) nogen keep(1 3)
replace earn=. if yob!=1952
replace nearn=. if yob!=1952
replace obsy=. if yob!=1952
replace emp=. if yob!=1952
rename (earn nearn obsy emp)(fearn fnearn fobsy femp)
forval i=53/94{
	merge m:1 idnr using "use\earn`i'_2", keepusing(earn nearn obsy emp) nogen keep(1 3)
	replace fearn=earn if yob==19`i'
	replace fnearn=nearn if yob==19`i'
	replace fobsy=obsy if yob==19`i'
	replace femp=emp if yob==19`i'
	drop earn nearn obsy emp
}
rename (idnr temp1 temp2 temp3)(fidnr idnr earn emp)
mvencode fnearn mnearn, mv(0)
lab var fearn "father's earnings"
lab var mearn "mother's earnings"
lab var fnearn "observations father's earnings"
lab var mnearn "observations mother's earnings"
lab var fobsy "year father's earnings observed"
lab var mobsy "year mother's earnings observed"
lab var femp "father's employment"
lab var memp "mother's employment"
*2c) Adding education
rename(idnr fidnr)(temp idnr)
sort idnr
merge m:1 idnr using "use\edu_max", keepusing(edlev edu) keep(1 3) nogen
rename(edlev edu idnr midnr)(fedlev fedu fidnr idnr)
sort idnr
merge m:1 idnr using "use\edu_max", keepusing(edlev edu) keep(1 3) nogen
rename(edlev edu idnr temp)(medlev medu midnr idnr)
sort idnr
merge m:1 idnr using "use\edu_max", keepusing(edlev edu) keep(1 3) nogen
*2d) Imposing restrictions
*i) Dropping children whose parents are unknown
drop if missing(fidnr) & missing(midnr)
*ii) Dropping parental income averages with less than 2 observed incomes
replace fearn=. if fnearn<2
replace mearn=. if mnearn<2
*iii) Dropping children without parental income
drop if fearn==. & mearn==.
*2e) Generating combined parental income, employment and education measures
egen fmearn=rowmean(fearn mearn)
egen fmmax=rowmax(fearn mearn)
egen fmedu=rowmean(fedu medu)
egen fmemp=rowmean(femp memp)
lab var fmearn "average parental earnings"
lab var fmmax "max of parental earnings"
lab var fmedu "average parental education"
lab var fmemp "average parental employment"
*2f) Ranks for child earnings and parental earnings by (child's) cohort and gender (of child)
local vars "fmearn fmmax earn fearn mearn"
foreach x of local vars{
	bysort yob woman: egen n=count(`x')
	bysort yob woman: egen i=rank(`x')
	bysort yob woman: g p`x'=(i-1)/(n-1)*100
	drop n i
}

*2g) Ranks for child earnings by (child's) cohort only. For pooled sons and daughters
bysort yob: egen n=count(earn)
bysort yob: egen i=rank(earn)
bysort yob: g pearn_p=(i-1)/(n-1)*100
drop n i
lab var pearn "earnings rank by yob and gender"
lab var pearn_p "earnings rank by yob pooled"
lab var pfearn "father's earnings rank by child's yob and gender"
lab var pmearn "mother's earnings rank by child's yob and gender"
lab var pfmearn "parental earnings rank by child's yob and gender"
lab var pfmmax "parental earnings rank of max average"
compress
sort idnr year
save "use\swepanel_zeros", replace